# encoding: utf-8
import re
import requests
# from fake_useragent import UserAgent
import json
from lxml import html
import io
import sys
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
tunnel = "tps137.kdlapi.com:15818"
# 用户名密码方式
username = "t12332534335831"
password = "s0c8j1zn"
proxie = {
    "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
    "https": "https://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
}
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030')
etree = html.etree
# ua = UserAgent()
"""获取页面内容"""
kv = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
    'Cookie': '_zap=fe74b60e-3a61-40c5-b60b-386a19be7713; d_c0="AJBffIkJ_hKPToyVi7Y1lfKszodp8YbT8ng=|1619065796"; z_c0=Mi4xUkRINEJ3QUFBQUFBa0Y5OGlRbi1FaGNBQUFCaEFsVk44czE0WVFEa2xXSllCVjBjMGZ6LUpmeDMxZC1kVTdyWHlR|1619754994|0499a9b4c08303b851826aa02d354782e9d69860; _xsrf=83f02431-38a8-4515-9b65-c6bf77dbcbcb; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1626348892,1626350315,1626401558,1626491662; JOID=Vl4UBkMXz_IW9bepJBYhafR2v1kzJPCjeLPixVBigYFbrtP8UxyHCXf7sqotn8oC7k_loecNkUjflKruykKxy60=; osd=W14cB0waz_oX-rqpLBcuZPR-vlY-JPiid77izVFtjIFTr9zxUxSGBnr7uqsiksoK70Dooe8MnkXfnKvhx0K5yqI=; tst=r; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1626491697; KLBRSID=57358d62405ef24305120316801fd92a|1626491701|1626491659'
}
kv1 = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
    # 'cookie':'自己cookie带上'
}
# 屏蔽词
k = ["原神", "开水烫蟑螂", "中国象棋", "王者荣耀", "基本款键盘", '大司马版本的"肌肉金轮"', "选购键盘", "明星颜值被吹上天", "iPhone", "李逍遥的这个皮肤是否值得入手", "python",
     "Python", "开头写一篇小说", "写一个故事", "为开头", "论文查重", "病娇", "打王者连王者", "明日方舟", "为开头写个故事", "摩尔庄园", "少年团", "利路修", "新鲜出炉的鳕鱼堡",
     "大司马因肌肉金轮的视频", "灵笼", "机械键盘", "耳屎", "追妻火葬场"]
c = []
def spider_1(e):
    resp = requests.get(e, headers=kv, timeout=15, verify=False).text
    # input(resp)
    it = json.loads(resp)
    # input(resp)
    # input(11)
    for i in it['data']:
        p = False
        if i['verb'] == 'TOPIC_ACKNOWLEDGED_ANSWER':
            for j in k:
                if j in i['target']['question']['title']:
                    p = True
                    c.append(i['target']['question']['title'])
            if not p:
                print(i['target']['question']['title'] + 'https://www.123wxwx.com/html/9/9621/77649206.html' * 2)
                soup = etree.HTML(i['target']['content'])
                print(''.join(soup.xpath('//text()')))
                print('https://www.123wxwx.com/html/9/9621/77649206.html' * 4)
    print(c)


def szz1():
    # try:="/html/9/9621/77325955.html 77315317
    # 		<dd><a href ="/html/9/9621/77328286.html
    # 		<dd><a href ="/html/9/9621/77338869.html
    # 		<dd><a href ="/html/9/9621/77343887.html"
    r = requests.get(f'https://www.123wxwx.com/html/9/9621', headers=kv1, verify=False).content
    # r = requests.get(f'https://www.123wxwx.com/html/9/9621/77295784.html', headers=kv1, proxies=proxie, verify=False).content
    sp = etree.HTML(r)
    li = sp.xpath('//span[@class="last"]/a/text()')
    zz = sp.xpath('//span[@class="last"]/a/@href')
    input(li+zz)
    for i in sp.xpath('//div[@id="content"]/text()'):
        print(f'https://www.123wxwx.com/html/9/9621/77605968' + i)
    # except:
    #     print('有点东西')
    #     szz1()e3QCY8IDcWlcHV12?clash=4&extend=1


if __name__ == '__main__':
    # spider_1(
    #     'https://www.zhihu.com/api/v3/feed/topstory/recommend?session_token=8a6cb7553fec62ca3467d600605a7600&desktop=true&page_number=4&limit=6&action=down&after_id=17&ad_interval=-1')
    szz1()
    # input('')
